*Get samples of states from each non-propensity-score-weighted regression 

clear
use "1920_1940_merged.dta", replace

********************
*Sample for Occupational Mobility
*******************
preserve

keep if age1920>=6 

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars occdiffhh nonocchh;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffhh nonocchh constant (mean) statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr
gen constantrd = round(constant,1)
gen below10 = constantrd<10
count if below10==1
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip white1920 if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

local vars occdiffhh 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocchh)
}

sort cty_fips
local vars occdiffhh 
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
gen sampoccmobpooledwhitex = e(sample)==1

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
gen sampoccmobpooledblackx = e(sample)==1

local vars2 sampoccmobpooledwhite sampoccmobpooledblack
foreach y of local vars2{
bysort cty_fips1920: egen `y' = max(`y'x)
}

drop if white1920==0 
keep cty_fips1920 sampoccmobpooledwhite sampoccmobpooledblack
save sampoccmobpooled1920.dta, replace 
}

restore

**********************
*Sample for male occupational mobility
**********************

preserve

keep if age1920>=6
keep if male1920==1
*Drop if dad's location not found
keep if occ1950_pop1920~=.

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars constant occdiffdad nonocc1940;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffdad nonocc1940 constant  (mean)statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)


*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5


local vars occdiffdad
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocc1940)
}

sort cty_fips
local vars occdiffdad
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
gen sampoccmaleswhitex = e(sample)==1

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
gen sampoccmalesblackx = e(sample)==1

local vars2 sampoccmaleswhite sampoccmalesblack
foreach y of local vars2{
bysort cty_fips1920: egen `y' = max(`y'x)
}

drop if white1920==0 

keep cty_fips1920 sampoccmaleswhite sampoccmalesblack
save sampoccmales1920.dta, replace
}

restore

*********************************
*Sample for results by parental occupation score
*********************************

forvalues i = 0(1)1{

preserve
keep if (white1920==1|black1920==1)

keep if bmedmaxoccscoreparent1920==`i'

#delimit ;
local vars constant atlhs;
foreach x of local vars{;
gen `x'wt = `x'*weight;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) constant atlhs (mean) statefip1920 hasnormalschool, by(cty_fips1920 white1920 male1920);

#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920 male1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920 male1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920 male1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

#delimit cr
local vars atlhs 
foreach x of local vars{
gen `x'mean = `x'/constant
}

sort cty_fips
local vars atlhs

foreach x of local vars{

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==1, absorb(statefip1920) cluster(statefip1920)
gen sampecwhitemale`i'x = e(sample)==1
tab statefip if e(sample)==1

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==0, absorb(statefip1920) cluster(statefip1920)
gen sampecwhitefemale`i'x = e(sample)==1
tab statefip if e(sample)==1

reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==1, absorb(statefip1920) vce(robust)
gen sampecblackmale`i'x = e(sample)==1
tab statefip if e(sample)==1

reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==0, absorb(statefip1920) vce(robust)
gen sampecblackfemale`i'x = e(sample)==1
tab statefip if e(sample)==1

local vars2 sampecwhitemale`i' sampecwhitefemale`i' sampecblackmale`i' sampecblackfemale`i'
foreach y of local vars2{ 
bysort cty_fips1920: egen `y' = max(`y'x)
}

drop if white1920==0|male1920==0

keep cty_fips1920 sampec* 
save sampec`i'1920.dta, replace

#delimit cr
}
restore
}

*Append datasets above to generate a dataset identifying samples by outcome
clear
use sampec01920.dta
merge 1:1 cty_fips1920 using sampec11920.dta
rename _merge mergeec1

merge 1:1 cty_fips1920 using sampoccmobpooled1920.dta
rename _merge mergeoccpooled

merge 1:1 cty_fips1920 using sampoccmales1920.dta
rename _merge mergeoccmales

keep cty_fips samp*
drop samp*x

save sampwithoutpscore.dta, replace

**************
do "usa_00112.do"

*Exclude people living in group quarters
keep if gq~=3 & gq~=4

local vars occscore 
foreach x of local vars{
gen max`x'parent = max(occscore_mom, occscore_pop)
}

*Now merge with Eckert crosswalk

rename stateicp icpsrst 
rename countyicp icpsrcty
	
joinby icpsrst icpsrcty using "EGLP_1920.dta", unmatched(both)

rename _merge merge1920crosswalk

gen cty_fips = nhgisst_1990*100 + (nhgiscty_1990/10)

*Drop non-merged from crosswalk
drop if merge1920crosswalk==2

*There are some counties from IPUMS that don't merge to the crosswalk.  None of these affect normal/asylum counties, so can drop them
*See parent_occ1920.do for more details.
drop if merge1920crosswalk==1

merge m:1 cty_fips using "justnormasylum.dta"
rename _merge mergenormasylum

*Keep normal school and asylum counties
keep if mergenormasylum==3

rename cty_fips cty_fips1920
drop year icpsrst icpsrcty nhgisst_1990 nhgiscty_1990 nhgisst nhgiscty statenam nhgisnam area_base statenam_1990 nhgisnam_1990 icpsrst_1990 icpsrcty_1990 area us_state 

tostring cty_fips1920, gen(cty_fipstr)
gen statefip1920 = substr(cty_fipstr, 1,1) if length(cty_fipstr)==4
replace statefip1920 = substr(cty_fipstr, 1, 2) if length(cty_fipstr)==5
destring statefip1920, replace

*Drop DE, Georgia, RI, SC, NV, WY, AZ, NM, AK, HI because they don't have at least one normal school and one asylum county

drop if statefip==2|statefip==4|statefip==10|statefip==13|statefip==15|statefip==32|statefip==35|statefip==44|statefip==45|statefip==56

save "1920_diffmergesmall.dta", replace

clear
use "1920_1940.dta"

merge 1:m histid1920 using "1920_diffmergesmall.dta"
rename _merge mergetree1920

gen merged1920 = mergetree1920==3
drop if mergetree1920==1

save "1920_diffmergesmall_censustree.dta", replace

gen male1920 = sex==1
replace male1920 = . if sex==.

gen white1920 = race==1
replace white1920 = . if race==.


gen black1920 = race==2
replace black1920 = . if race==.

gen bmedmaxoccscoreparent1920 = maxoccscoreparent<=20 if white==1
replace bmedmaxoccscoreparent1920 = maxoccscoreparent<=20 if white==0

gen cty_fips = cty_fips1920

gen occparentsnonmiss = (occ1950_pop~=.|occ1950_mom~=.)

*Only keep those that are living with at least one of their parents--to avoid getting people who traveled for school, asylum

keep if occparentsnonmiss==1

gen head_pop = relate_pop==1

gen head_mom = relate_mom==1

gen livewithpop = occ1950_pop~=.
gen livewithmom = occ1950_mom~=.

gen liveinbirthstate_pop = bpl_pop==statefip1920

gen liveinbirthstate_mom = bpl_mom == statefip1920

gen liveinbirthstate = bpl==statefip1920
replace liveinbirthstate=. if bpl==.

gen foreignborn = bpl>=100
replace foreignborn = . if bpl==.

gen foreignborn_pop = bpl_pop>=100

gen foreignborn_mom = bpl_mom>=100

gen famunit_1 = famunit==1
replace famunit_1 = . if famunit==.

egen taghistid1920 = tag(histid1920)
replace age_pop = 0 if age_pop==.
replace age_mom = 0 if age_mom==.

*Merge to 1940 census file to identify people who merge to both the 1920 and 1940 census files

merge m:1 histid1940 using "1940_small.dta"
rename _merge mergetree1940
drop if mergetree1940==2

gen merge20and40 = mergetree1920==3 & mergetree1940==3

#delimit cr

merge m:1 cty_fips1920 using sampwithoutpscore.dta
rename _merge mergesampwithoutpscore

*Table A24: Testing for representativeness of Census Tree Links, White Individuals

#delimit ;

local control maxoccscoreparent age_mom age_pop head_pop head_mom  livewithpop livewithmom 
liveinbirthstate liveinbirthstate_pop liveinbirthstate_mom foreignborn foreignborn_pop 
foreignborn_mom nsibs famunit_1;
 
#delimit cr

*White children
reghdfe merge20and40 `control' if  white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecwhitemale1==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
distinct statefip if e(sample)==1
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_White.xls, replace excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0 & sampecwhitemale0==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_White.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecwhitefemale1==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_White.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0 & sampecwhitefemale0==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_White.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

*Table A25: Testing for representativeness of Census Tree Links, White Individuals
reghdfe merge20and40 `control' if  black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecblackmale1==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_Black.xls, replace excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0 & sampecblackmale0==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_Black.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecblackfemale1==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_Black.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe merge20and40 `control' if  black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0 & sampecblackfemale0==1 [pw=weight], absorb(cty_fips1920) cluster(cty_fips1920)
su merge20and40 if e(sample)==1
local mean = r(mean)
outreg2 using selection_1920_Black.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

*Propensity score estimation separately for each subsample

#delimit ;

local control maxoccscoreparent age_mom age_pop head_pop head_mom  livewithpop livewithmom 
liveinbirthstate liveinbirthstate_pop liveinbirthstate_mom foreignborn foreignborn_pop foreignborn_mom nsibs famunit_1;
  
#delimit cr
probit merge20and40 `control' if  white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecwhitemale1==1  [pw=weight]
distinct cty_fips1920 if e(sample)==1
tab statefip if e(sample)==1
predict predictwhitemalebmed1, pr

probit merge20and40 `control' if  white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0 & sampecwhitemale0==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhitemalebmed0, pr

probit merge20and40 `control' if  white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecwhitefemale1==1  [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhitefemalebmed1, pr

probit merge20and40 `control' if  white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0 & sampecwhitefemale0==1  [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhitefemalebmed0, pr

#delimit cr
probit merge20and40 `control' if  black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecblackmale1==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackmalebmed1, pr

probit merge20and40 `control' if  black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0 & sampecblackmale0==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackmalebmed0, pr

probit merge20and40 `control' if  black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecblackfemale1==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackfemalebmed1, pr

probit merge20and40 `control' if  black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0 & sampecblackfemale0==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackfemalebmed0, pr

*Propensity score estimation for the occupation specifications, some of which are just by race, and some by race and sex

#delimit cr
probit merge20and40 `control' if  white1920==1 & male1920==1 & sampoccmaleswhite==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhitemale, pr

probit merge20and40 `control' if  white1920==1 & male1920==0 & sampoccmaleswhite==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhitefemale, pr

#delimit cr
probit merge20and40 `control' if  black1920==1 & male1920==1 & sampoccmalesblack==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackmale, pr

probit merge20and40 `control' if  black1920==1 & male1920==0 & sampoccmalesblack==1  [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblackfemale, pr

#delimit cr
probit merge20and40 `control' if  white1920==1 & sampoccmobpooledwhite==1  [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictwhite, pr

#delimit cr
probit merge20and40 `control' if  black1920==1 & sampoccmobpooledblack==1 [pw=weight]
distinct cty_fips1920 if e(sample)==1
predict predictblack, pr


#delimit ;
local vars predictwhitemalebmed1 predictwhitemalebmed0 predictwhitefemalebmed1 predictwhitefemalebmed0 
predictblackmalebmed1 predictblackmalebmed0 predictblackfemalebmed1 predictblackfemalebmed0 
predictwhitemale predictwhitefemale predictblackmale predictblackfemale 
predictwhite predictblack;

#delimit cr
foreach x of local vars{
gen ps`x' = 1/`x'
}

save "1920_1940_diffmergeall.dta", replace

keep if merge20and40==1

gen ps = pspredictwhitemalebmed1 if white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecwhitemale1==1
replace ps = pspredictwhitemalebmed0 if white1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0  & sampecwhitemale0==1
replace ps = pspredictwhitefemalebmed1 if white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecwhitefemale1==1
replace ps = pspredictwhitefemalebmed0 if white1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0  & sampecwhitefemale0==1
replace ps = pspredictblackmalebmed1 if black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==1 & sampecblackmale1==1
replace ps = pspredictblackmalebmed0 if black1920==1 & male1920==1 & bmedmaxoccscoreparent1920==0 & sampecblackmale0==1
replace ps = pspredictblackfemalebmed1 if black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==1 & sampecblackfemale1==1
replace ps = pspredictblackfemalebmed0 if black1920==1 & male1920==0 & bmedmaxoccscoreparent1920==0 & sampecblackfemale0==1

gen weightwithps = weight*ps

gen psoccmobpooled = pspredictwhite if white1920==1 & sampoccmobpooledwhite==1
replace psoccmobpooled = pspredictblack if black1920==1 & sampoccmobpooledblack==1

gen weightwithpsoccmobpooled = weight*psoccmobpooled

gen psoccmobbysex = pspredictwhitemale if white1920==1 & male1920==1 & sampoccmaleswhite==1
replace psoccmobbysex = pspredictwhitefemale if white1920==1 & male1920==0 & sampoccmaleswhite==1
replace psoccmobbysex = pspredictblackmale if black1920==1 & male1920==1 & sampoccmalesblack==1
replace psoccmobbysex = pspredictblackfemale if black1920==1 & male1920==0 & sampoccmalesblack==1

gen weightwithpsoccmobbysex = weight*psoccmobbysex


gen atlhs = higrade1940>=15
replace atlhs = . if higrade1940==.
replace atlhs = . if higrade1940==99

gen atlsomecoll = higrade1940>=16
replace atlsomecoll = . if higrade1940==.
replace atlsomecoll = . if higrade1940==99

gen atlcoll = higrade1940>=19
replace atlcoll = . if higrade1940==.
replace atlcoll = . if higrade1940==99

local vars incwage1940 incwage_sp1940
foreach x of local vars{
gen `x'U = `x'
replace `x'U = . if `x'==999999
replace `x'U = . if `x'==999998
}

gen unsk = occ19501940>=700 & occ19501940<=970
replace unsk = . if (occ19501940==.|occ19501940==999)

gen farmer = occ19501940==100 | occ19501940==123
replace farmer = . if (occ19501940==.|occ19501940==999)

gen unsk_sp = occ1950_sp1940>=700 & occ1950_sp1940<=970
replace unsk_sp = . if (occ1950_sp1940==.|occ1950_sp1940==999)

gen farmer_sp = occ1950_sp1940==100 | occ1950_sp1940==123
replace farmer_sp = . if (occ1950_sp1940==.|occ1950_sp1940==999)

egen hhincwage = rowtotal(incwage1940U incwage_sp1940U), missing


gen emp = empstat1940==1
replace emp = . if empstat1940==.

gen married = marst1940==1
replace married = . if marst1940==.


gen unsk1940 = occ19501940>=700 & occ19501940<=970
replace unsk1940 = . if (occ19501940==.|occ19501940==999)

gen unsk_sp1940 = occ1950_sp1940>=700 & occ1950_sp1940<=970
replace unsk_sp1940 = . if (occ1950_sp1940==.|occ1950_sp1940==999)

gen farmer1940hh = occ19501940==100|occ19501940==123|occ1950_sp1940==100|occ1950_sp1940==123
replace farmer1940hh = . if ((occ19501940==.) & (occ1950_sp1940==.))

gen farmer1940 = occ19501940==100|occ19501940==123

gen whitecollar1940 = (occ19501940>=0 & occ19501940<=99)|(occ19501940>=200 & occ19501940<=490)
gen whitecollar_sp1940 = (occ1950_sp1940>=0 & occ1950_sp1940<=99)|(occ1950_sp1940>=200 & occ1950_sp1940<=490)

gen craftoper1940 = occ19501940>=500 & occ19501940<=690
gen craftoper_sp1940 = occ1950_sp1940>=500 & occ1950_sp1940<=690

gen nonocc1940 = occ19501940>=980 & occ19501940<=999
gen nonocc_sp1940 = occ1950_sp1940>=980 & occ1950_sp1940<=999

local vars unsk craftoper whitecollar
foreach x of local vars{
egen `x'hh = rowmax(`x'1940 `x'_sp1940)
replace `x'hh = . if occ19501940==. & occ1950_sp1940==.
}

gen nonocchh = nonocc1940==1 & nonocc_sp1940==1
replace nonocchh = 1 if nonocc1940==1 & occ1950_sp1940==.
replace nonocchh = . if occ19501940==. & occ1950_sp1940==.

for var  age sex maxoccscoreparent race occ1950_mom occ1950_pop: rename X X1920

gen dadunsk1920 = occ1950_pop1920>=700 & occ1950_pop1920<=970
gen momunsk1920 = occ1950_mom1920>=700 & occ1950_mom1920<=970

gen dadwhitecollar1920 = (occ1950_pop1920>=0 & occ1950_pop1920<=99)|(occ1950_pop1920>=200 & occ1950_pop1920<=490)
gen momwhitecollar1920 = (occ1950_mom1920>=0 & occ1950_mom1920<=99)|(occ1950_mom1920>=200 & occ1950_mom1920<=490)

gen dadcraftoper1920 = occ1950_pop1920>=500 & occ1950_pop1920<=690
gen momcraftoper1920 = occ1950_mom1920>=500 & occ1950_mom1920<=690

gen dadnonocc1920 = occ1950_pop1920>=980 & occ1950_pop1920<=999
gen momnonocc1920 = occ1950_mom1920>=980 & occ1950_mom1920<=999

gen parent_farmer = occ1950_pop1920==100|occ1950_pop1920==123|occ1950_mom1920==100|occ1950_mom1920==123
replace parent_farmer = . if ((occ1950_pop1920==.) & (occ1950_mom1920==.))

gen dad_farmer = occ1950_pop1920==100|occ1950_pop1920==123

local vars unsk craftoper whitecollar 
foreach x of local vars{
egen parent`x' = rowmax(dad`x'1920 mom`x'1920)
replace parent`x' = . if occ1950_pop1920==. & occ1950_mom1920==.
}

gen occdiffhh = unskhh==1 & parentunsk~=1
replace occdiffhh = 1 if craftoperhh==1 & parentcraftoper~=1
replace occdiffhh = 1 if whitecollarhh==1 & parentwhitecollar~=1
replace occdiffhh = 1 if farmer1940hh==1 & parent_farmer~=1

gen occdiffdad = unsk1940==1 & dadunsk~=1
replace occdiffdad = 1 if craftoper1940==1 & dadcraftoper~=1
replace occdiffdad = 1 if whitecollar1940==1 & dadwhitecollar~=1
replace occdiffdad = 1 if farmer1940==1 & dad_farmer~=1

gen parentnonocc = dadnonocc1920==1 & momnonocc1920==1
replace parentnonocc = 1 if dadnonocc1920==1 & occ1950_mom1920==.
replace parentnonocc = 1 if momnonocc1920==1 & occ1950_pop1920==.

gen constant = 1

gen poshhwage = hhincwage>0 & hhincwage~=.

save "1920_1940_diffmerged.dta", replace

********************
*Tables A26, A28: Occupations
*******************
set seed 349132



preserve

keep if age1920>=6 

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars occdiffhh unskhh craftoperhh whitecollarhh nonocchh farmer1940hh male1920 parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer constant;
foreach x of local vars{;
gen `x'wt = `x'*weightwithpsoccmobpooled;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffhh unskhh craftoperhh whitecollarhh nonocchh farmer1940hh parentunsk parentcraftoper parentwhitecollar parentnonocc parent_farmer male1920 constant (mean)statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
count if below10==1
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip white1920 if maxfracbelow10>=.5
drop if maxfracbelow10>=.5

local vars  parent_farmer parentunsk parentcraftoper parentwhitecollar parentnonocc unskhh craftoperhh whitecollarhh nonocchh farmer1940hh male1920 
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars occdiffhh 
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocchh)
}

sort cty_fips

local vars occdiffhh unskhh craftoperhh whitecollarhh nonocchh  farmer1940hh male1920 parentunsk parentcraftoper parentwhitecollar  parentnonocc parent_farmer  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940ps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940blps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0;
#delimit cr
}

restore


*****
*Tables A27, A29: Male occupations

preserve

keep if age1920>=6
keep if male1920==1
*Drop if dad's location not found
keep if occ1950_pop1920~=.

keep if (white1920==1|black1920==1)

tab age1920
tab age1940
count
#delimit ;
local vars constant occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920  ;
foreach x of local vars{;
gen `x'wt = `x'*weightwithpsoccmobbysex;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920 constant  (mean)statefip1920 hasnormalschool, by(cty_fips1920 white1920);

#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920: egen maxfracbelow10 = max(fracbelow10)

*Drop states for which 50% or more of their normal or asylum counties have sample size less than 10

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5


local vars   unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920
foreach x of local vars{
gen `x'mean = `x'/constant
}

local vars occdiffdad
foreach x of local vars{
gen `x'mean = `x'/(constant-nonocc1940)
}

sort cty_fips
local vars occdiffdad unsk1940 craftoper1940 whitecollar1940 farmer1940 nonocc1940 dadunsk1920 dadcraftoper1920 dadwhitecollar1920 dad_farmer dadnonocc1920  
foreach x of local vars{

reghdfe `x'mean hasnormalschool if white1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940ps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool if white1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1, d
tab statefip1920 if e(sample)==1
outreg2 using censustree19201940blps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0;
#delimit cr
}

restore

*****************************************************
*Tables A30, A31: Outcomes by parental occupation score

forvalues i = 0(1)1{

preserve
keep if (white1920==1|black1920==1)

keep if bmedmaxoccscoreparent1920==`i'

#delimit ;
local vars constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage;
foreach x of local vars{;
gen `x'wt = `x'*weightwithps;
drop `x';
rename `x'wt `x';
};

#delimit ;
collapse (sum) constant atlhs atlsomecoll atlcoll hhincwage emp married poshhwage (mean) statefip1920 hasnormalschool, by(cty_fips1920 white1920 male1920);


#delimit cr

gen constantrd = round(constant,1)
gen below10 = constantrd<10
bysort statefip hasnormalschool white1920 male1920: egen totctytype = count(hasnormalschool)
bysort statefip hasnormalschool white1920 male1920: egen totbelow10 = total(below10)
gen fracbelow10 = totbelow10/totctytype
bysort statefip white1920 male1920: egen maxfracbelow10 = max(fracbelow10)

tab statefip if maxfracbelow10>=.5
drop if maxfracbelow10>=.5


#delimit cr
local vars atlhs atlsomecoll atlcoll emp married
foreach x of local vars{
gen `x'mean = `x'/constant
}

gen hhincwagemean = hhincwage/poshhwage
gen lnhhincwagemean = ln(hhincwagemean)

gen hasnormalschool1920 = hasnormalschool

sort cty_fips
local vars atlhs atlsomecoll atlcoll married lnhhincwage emp

foreach x of local vars{

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==1, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940ps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

reghdfe `x'mean hasnormalschool1920 if white1920==1 & male1920==0, absorb(statefip1920) cluster(statefip1920)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940ps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)


reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==1, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940blps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0 & male1920==1;

#delimit cr

reghdfe `x'mean hasnormalschool1920 if white1920==0 & male1920==0, absorb(statefip1920) vce(robust)
su `x'mean if e(sample)==1 & hasnormalschool==0
local mean = r(mean)
su constant if e(sample)==1
outreg2 using censustree19201940blps.xls, append excel dec(3) addstat(dvmean, `mean') adec(3)

#delimit ;
permute hasnormalschool _b[hasnormalschool], strata(statefip1920) reps(1000): reg `x'mean hasnormalschool i.statefip1920 if white1920==0 & male1920==0;

#delimit cr
}
restore
}
